We need a bunch of these…
First jsonlite for processing the colours data pulled from the website
Next, a bunch of data munging packages from the ‘tidyverse’
And finally the R (vector) spatial packages
See the Dulux website for what this is all about
First I had a poke around on the website to figure out where the colour details were to be found
The loop on the next slide
bind_rowscolours <- list()
for (i in 1:length(colour_groups)) {
colour_group <- colour_groups[i]
json_url <- str_c(base_url, colour_group)
json_file_name <- str_c(colour_group, ".json")
json <- fromJSON(json_url, flatten = TRUE)
# make a local copy (just for convenience)
write_json(json, json_file_name)
# get the colours information and add to list
the_colours <- rbind.fill(json$categoryColours$masterColour.colours)
colours[[i]] <- the_colours
Sys.sleep(0.5) # pause to not annoy the the server
}
df_colours <- bind_rows(colours)
write.csv(df_colours, "dulux-colours-raw.csv", row.names = FALSE)## id red green blue lrv baseId name woodType coats
## 1 149253 205 210 206 67 vivid_white Pukaki Quarter None NA
## 2 149254 220 240 242 86 vivid_white Canoe Bay None NA
## 3 149255 226 240 245 87 vivid_white Mt Dobson None NA
## 4 149256 220 230 235 80 vivid_white Raetihi None NA
## 5 149257 217 219 223 74 vivid_white Taiaroa Head None NA
## 6 149258 180 200 219 60 vivid_white Gulf Harbour None NA
There are paint names with modifiers as suffixes for different shades of particular colours, and we need to handle this
The modifiers are
Here’s one way to clean this up (there are others…)
df_colours_tidied <- df_colours %>%
## remove some columns we won't be needing
select(-id, -baseId, -woodType, -coats) %>%
## separate the name components, filling from the left with NAs if <5
separate(name, into = c("p1", "p2", "p3", "p4", "p5"), sep = " ",
remove = FALSE, fill = "left") %>%
## replace any NAs with an empty string
mutate(p1 = str_replace_na(p1, ""),
p2 = str_replace_na(p2, ""),
p3 = str_replace_na(p3, ""),
p4 = str_replace_na(p4, "")) %>%
## if p5 is a paint modifiers, then recompose name
## from p1:p4 else from p1:p5
## similarly keep modifier where it exists
mutate(placename = if_else(p5 %in% paint_modifiers,
str_trim(str_c(p1, p2, p3, p4, sep = " ")),
str_trim(str_c(p1, p2, p3, p4, p5, sep = " "))),
modifier = if_else(p5 %in% paint_modifiers,
p5, "")) %>%
## remove some places that are tricky to deal with later
filter(!placename %in% c("Chatham Islands",
"Passage Rock",
"Auckland Islands",
"Cossack Rock")) %>%
## throw away variables we no longer and reorder
select(name, placename, modifier, red, green, blue)
# save it so we have it for later
write.csv(df_colours_tidied, "dulux-colours.csv", row.names = FALSE)Add x and y columns to our data for the coordinates—note that we reload from the saved file so as not to keep hitting the Dulux website
tmaptools::geocode_OSMCode on the next slide:
x y coordinates as we have space for (due to the modifiers) from the geocoding resultsBest not to re-run this (it takes a good 10 minutes and it’s not good to repeatedly geocode and hit the OSM server)
for (placename in unique(df_colours_tidied_xy$placename)) {
address <- str_c(placename, "New Zealand", sep = ", ")
geocode <- geocode_OSM(address, as.data.frame = TRUE, return.first.only = FALSE)
num_geocodes <- nrow(geocode)
matching_rows <- which(df_colours_tidied_xy$placename == placename)
for (i in 1:length(matching_rows)) {
if (!is.null(geocode)) {
if (num_geocodes >= i) {
df_colours_tidied_xy[matching_rows[i], ]$x <- geocode$lon[i]
df_colours_tidied_xy[matching_rows[i], ]$y <- geocode$lat[i]
}
}
}
Sys.sleep(0.5) # so as not to over-tax the geocoder
}Another tidy up removing anything that didn’t get geocoded
Make the dataframe into a sf point dataset
dulux_colours_sf <- st_as_sf(dulux_colours,
coords = c("x", "y"), # columns with the coordinates
crs = 4326) %>% # EPSG:4326 for lng-lat
st_transform(2193) %>% # convert to NZTM
## and make an RGB column
mutate(rgb = rgb(red / 255, green / 255, blue/ 255))
# jitter any duplicate locations
duplicate_pts <- which(duplicated(dulux_colours_sf$geometry) |
duplicated(dulux_colours_sf$geometry, fromLast = TRUE))
jittered_pts <- dulux_colours_sf %>%
slice(duplicate_pts) %>%
st_jitter(50)
dulux_colours_sf[duplicate_pts, ]$geometry <- jittered_pts$geometry
st_write(dulux_colours_sf, "dulux-colours-pts.gpkg", delete_dsn = TRUE)Points aren’t really much fun
Instead, make up Voronois and clip to NZ
sf and tmap (for basic geospatial)